{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## CNN Text Classification with Keras"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
      "  from ._conv import register_converters as _register_converters\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[i] Loaded Parameters:\n",
      " 40000 35 0.2 200 \n",
      " dataset/glove/glove.twitter.27B.200d.txt\n",
      "[i] Importing Modules...\n",
      "[+] Using Keras version 2.0.8\n",
      "[i] Finished Importing Modules\n",
      "[i] . Reading from csv file...Done!\n",
      "[i] Found 33856 unique tokens.\n",
      "[+] Shape of data tensor: (45318, 30)\n",
      "[+] Shape of label tensor: (45318, 6)\n",
      "[+] Number of entries in each category:\n",
      "[+] Training:\n",
      " [ 7754.  8268.  4773. 12722.  1846.   892.]\n",
      "[+] Validation:\n",
      " [1890. 2002. 1256. 3220.  477.  218.]\n",
      "[i] Loading GloVe from: dataset/glove/glove.twitter.27B.200d.txt ...Done.\n",
      "[+] Proceeding with Embedding Matrix...Completed!\n",
      "Finished running setup.\n"
     ]
    }
   ],
   "source": [
    "import plaidml.keras\n",
    "plaidml.keras.install_backend()\n",
    "\n",
    "%run Setup.ipynb\n",
    "%run ExtraFunctions.ipynb"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Load embedding matrix into an `Embedding` layer. Toggle `trainable=False` to prevent the weights from being updated during training."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "embedding_layer = Embedding(len(word_index) + 1,\n",
    "                            EMBEDDING_DIM,\n",
    "                            weights=[embedding_matrix],\n",
    "                            input_length=MAX_SEQUENCE_LENGTH,\n",
    "                            trainable=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Deeper 1D CNN\n",
    "[Reference](https://github.com/richliao/textClassifier), [Source](https://blog.keras.io/using-pre-trained-word-embeddings-in-a-keras-model.html) and [Notes](http://www.wildml.com/2015/11/understanding-convolutional-neural-networks-for-nlp/)\n",
    "\n",
    "Deeper Convolutional neural network: In [CNN for Sentence Classification](http://www.aclweb.org/anthology/D14-1181) (Yoon Kim, 2014), multiple filters have been applied. This can be implemented using Keras `Merge` Layer."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "INFO:plaidml:b'Opening device \"ati_radeon_pro_560_compute_engine.0\"'\n"
     ]
    }
   ],
   "source": [
    "sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32')\n",
    "embedded_sequences = embedding_layer(sequence_input)\n",
    "\n",
    "convs, filter_sizes = [], [2,3,5]\n",
    "for fsz in filter_sizes:\n",
    "    l_conv = Conv1D(filters=16,kernel_size=fsz,activation='relu')(embedded_sequences)\n",
    "    l_pool = MaxPooling1D(2)(l_conv)\n",
    "    convs.append(l_pool)\n",
    "\n",
    "l_merge = Concatenate(axis=1)(convs)\n",
    "l_cov1= Conv1D(24, 3, activation='relu')(l_merge)\n",
    "l_pool1 = MaxPooling1D(2)(l_cov1)\n",
    "l_drop1 = Dropout(0.4)(l_pool1)\n",
    "l_cov2 = Conv1D(16, 3, activation='relu')(l_drop1)\n",
    "l_pool2 = MaxPooling1D(17)(l_cov2) # global max pooling\n",
    "l_flat = Flatten()(l_pool2)\n",
    "l_dense = Dense(16, activation='relu')(l_flat)\n",
    "preds = Dense(6, activation='softmax')(l_dense)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "model = Model(sequence_input, preds)\n",
    "adadelta = optimizers.Adadelta(lr=1.0, rho=0.95, epsilon=None, decay=0.0)\n",
    "model.compile(loss='categorical_crossentropy',\n",
    "              optimizer=\"rmsprop\",\n",
    "              metrics=['accuracy'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "#tensorboard = callbacks.TensorBoard(log_dir='./logs', histogram_freq=0, batch_size=16, write_grads=True , write_graph=True)\n",
    "model_checkpoints = callbacks.ModelCheckpoint(\"checkpoints\", monitor='val_loss', verbose=0, save_best_only=True, save_weights_only=False, mode='auto', period=1)\n",
    "lr_schedule = callbacks.LearningRateScheduler(initial_boost)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "rm: logs: No such file or directory\r\n"
     ]
    }
   ],
   "source": [
    "!rm -R logs"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "____________________________________________________________________________________________________\n",
      "Layer (type)                     Output Shape          Param #     Connected to                     \n",
      "====================================================================================================\n",
      "input_1 (InputLayer)             (None, 30)            0                                            \n",
      "____________________________________________________________________________________________________\n",
      "embedding_1 (Embedding)          (None, 30, 200)       6771400     input_1[0][0]                    \n",
      "____________________________________________________________________________________________________\n",
      "conv1d_1 (Conv1D)                (None, 29, 16)        6416        embedding_1[0][0]                \n",
      "____________________________________________________________________________________________________\n",
      "conv1d_2 (Conv1D)                (None, 28, 16)        9616        embedding_1[0][0]                \n",
      "____________________________________________________________________________________________________\n",
      "conv1d_3 (Conv1D)                (None, 26, 16)        16016       embedding_1[0][0]                \n",
      "____________________________________________________________________________________________________\n",
      "max_pooling1d_1 (MaxPooling1D)   (None, 14, 16)        0           conv1d_1[0][0]                   \n",
      "____________________________________________________________________________________________________\n",
      "max_pooling1d_2 (MaxPooling1D)   (None, 14, 16)        0           conv1d_2[0][0]                   \n",
      "____________________________________________________________________________________________________\n",
      "max_pooling1d_3 (MaxPooling1D)   (None, 13, 16)        0           conv1d_3[0][0]                   \n",
      "____________________________________________________________________________________________________\n",
      "concatenate_1 (Concatenate)      (None, 41, 16)        0           max_pooling1d_1[0][0]            \n",
      "                                                                   max_pooling1d_2[0][0]            \n",
      "                                                                   max_pooling1d_3[0][0]            \n",
      "____________________________________________________________________________________________________\n",
      "conv1d_4 (Conv1D)                (None, 39, 24)        1176        concatenate_1[0][0]              \n",
      "____________________________________________________________________________________________________\n",
      "max_pooling1d_4 (MaxPooling1D)   (None, 19, 24)        0           conv1d_4[0][0]                   \n",
      "____________________________________________________________________________________________________\n",
      "dropout_1 (Dropout)              (None, 19, 24)        0           max_pooling1d_4[0][0]            \n",
      "____________________________________________________________________________________________________\n",
      "conv1d_5 (Conv1D)                (None, 17, 16)        1168        dropout_1[0][0]                  \n",
      "____________________________________________________________________________________________________\n",
      "max_pooling1d_5 (MaxPooling1D)   (None, 1, 16)         0           conv1d_5[0][0]                   \n",
      "____________________________________________________________________________________________________\n",
      "flatten_1 (Flatten)              (None, 16)            0           max_pooling1d_5[0][0]            \n",
      "____________________________________________________________________________________________________\n",
      "dense_1 (Dense)                  (None, 16)            272         flatten_1[0][0]                  \n",
      "____________________________________________________________________________________________________\n",
      "dense_2 (Dense)                  (None, 6)             102         dense_1[0][0]                    \n",
      "====================================================================================================\n",
      "Total params: 6,806,166\n",
      "Trainable params: 34,766\n",
      "Non-trainable params: 6,771,400\n",
      "____________________________________________________________________________________________________\n"
     ]
    }
   ],
   "source": [
    "model.summary()\n",
    "model.save(\"cnn_kim.h5\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Training Progress:\n",
      "Train on 36255 samples, validate on 9063 samples\n",
      "Epoch 1/20\n",
      "  200/36255 [..............................] - ETA: 2841s - loss: 1.6981 - acc: 0.2100"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-9-fe3c98f58f9a>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Training Progress:\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      2\u001b[0m model.fit(x_train, y_train, validation_data=(x_val, y_val),\n\u001b[0;32m----> 3\u001b[0;31m           epochs=20, batch_size=50)\n\u001b[0m",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mfit\u001b[0;34m(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, **kwargs)\u001b[0m\n\u001b[1;32m   1596\u001b[0m                               \u001b[0minitial_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minitial_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1597\u001b[0m                               \u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msteps_per_epoch\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1598\u001b[0;31m                               validation_steps=validation_steps)\n\u001b[0m\u001b[1;32m   1599\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1600\u001b[0m     def evaluate(self, x, y,\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_fit_loop\u001b[0;34m(self, f, ins, out_labels, batch_size, epochs, verbose, callbacks, val_f, val_ins, shuffle, callback_metrics, initial_epoch, steps_per_epoch, validation_steps)\u001b[0m\n\u001b[1;32m   1181\u001b[0m                     \u001b[0mbatch_logs\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'size'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_ids\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1182\u001b[0m                     \u001b[0mcallbacks\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_batch_begin\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_logs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1183\u001b[0;31m                     \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1184\u001b[0m                     \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1185\u001b[0m                         \u001b[0mouts\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mouts\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plaidml/keras/backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m    997\u001b[0m             \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_invoker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_output\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    998\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 999\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_invoker\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1000\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1001\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_ndarray\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0m_ctx\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mt\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mtensors\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plaidml/__init__.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1421\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1422\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0minvoke\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1423\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mInvocation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_ctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1424\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1425\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plaidml/__init__.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, ctx, invoker)\u001b[0m\n\u001b[1;32m   1427\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1428\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minvoker\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1429\u001b[0;31m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_as_parameter_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplaidml_schedule_invocation\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mctx\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minvoker\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m   1430\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_free\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_lib\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplaidml_free_invocation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m   1431\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/plaidml/__init__.py\u001b[0m in \u001b[0;36m_check_err\u001b[0;34m(self, result, func, args)\u001b[0m\n\u001b[1;32m    709\u001b[0m         \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplaidml_compute_grad_wrt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0merrcheck\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_check_err\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    710\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 711\u001b[0;31m     \u001b[0;32mdef\u001b[0m \u001b[0m_check_err\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfunc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    712\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    713\u001b[0m             \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "print(\"Training Progress:\")\n",
    "model.fit(x_train, y_train, validation_data=(x_val, y_val),\n",
    "          epochs=20, batch_size=50)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
